## data analysis '53 paper 
## TIREM / without VHF results table

rm(list = ls())

library(readxl)
library(multiwayvcov)
library(lmtest)
library(Hmisc)

setwd("~/Dropbox/Current projects/1953/Replication archive")

jamming <- ""
adjust <- ""


table1 <- matrix(NA,45,6)
rownames(table1) <- c(	"intercept",
						"county capital",
						"KVP base",
						"$%$ agriculture and forestry",
						"$%$ mining",
						"$%$ metalworking industry",
						"$%$ chemical industry",
						"$%$ woods and plastics",
						"$%$ consumer goods",
						"$%$ construction",
						"$%$ transportation",
						"$%$ commerce",
						"$%$ services",
						"log(population size)",
						"$%$ population male",
						"population density",
						"$%$ population change 1939--50",
						"$%$ population in same {\\it Land}",
						"$%$ 8 years of education",
						"$%$ 10 years of education",
						"$%$ 12 years of education",
						"$%$ more than 12 years of education",
						"$%$ trade school degree",
						"$%$ university degree",
						"$%$ protestant",
						"$%$ catholic",
						"$%$ agnostic",
						"living space per capita (m$^2$)", 
						"$%$ turnout '32",
						"$%$ invalid votes '32",
						"$%$ NSDAP '32",
						"$%$ SPD '32",
						"$%$ KPD '32",
						"$%$ Zentrum '32",
						"$%$ DNVP '32",
						"$%$ DVP '32",
						"RIAS signal strength",
						"RIAS signal strength$^2$",
						"RIAS signal strength$^3$",
						"distance to East Berlin (100 km)",
						"distance to East Berlin$^2$ (100 km)",
						"distance to East Berlin$^3$ (100 km)",
						"Wald test RIAS $p$-value",
						"Wald test distance $p$-value",
						"district fixed effects")

table1[45,c(1,3,5)] <- "Yes"



########################################
## probit model 1 --- TIREM 4 (with VHF)
########################################
## choose signal strength measure to employ
signal <- "tirem_4"

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100


source("hl.R")
probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,1:2] <- hl(temp[1:42,1:2])



## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,1] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,1] <- round(1 - pchisq(W, df = nrow(Q)),3)



#########################################
## probit model 2 --- ITM 3 (without VHF)
#########################################
## choose signal strength measure to employ
signal <- "lrm_3"

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100

probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,3:4] <- hl(temp[1:42,1:2])



## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,3] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,3] <- round(1 - pchisq(W, df = nrow(Q)),3)



###########################################
## probit model 3 --- TIREM 3 (without VHF)
###########################################
## choose signal strength measure to employ
signal <- "tirem_3"

load(paste("1953data", signal, adjust, jamming, ".RData", sep = ""))


## rescale distance to East Berlin so that coefficients are not too small
dat$distance_to_berlin <- dat$distance_to_berlin/100

probit.out <- glm(protest ~

							county_capital +
							KVP +
							LandForstwirtschaft +
							Bergbau +
							Metallverarbeitung +
							Chemie +
							HolzKunstmassen +
							Verbrauchsgueter +
							Bauwirtschaft +
							Verkehrswesen +
							Handel +
							Dienstleistung +
							log(population) +
							prop_male +
							density +
							pop_change +
							prop_Land_same39 +
							prop_educ_8 +
							prop_educ_10 +
							prop_educ_12 +
							prop_educ_12plus +
							prop_educ_Fachschule +
							prop_educ_Hochschule +
							prop_evangelisch +
							prop_katholisch +
							prop_glaubenslos +
							living_space +
							turnout32 +
							invalid32 +
							NSDAP32 +
							SPD32 +
							KPD32 +
							Z32 +
							DNVP32 +
							DVP32 +

							signal + I(signal^2) + I(signal^3) +
							distance_to_berlin +
							I(distance_to_berlin^2) +
							I(distance_to_berlin^3) +
							as.factor(size) +
							as.factor(district),
							x = TRUE, y = TRUE, model = TRUE,
							data = dat, family = binomial(link = "probit"))	

summary(probit.out)


## clustered standard errors
vcov.cl <- cluster.vcov(probit.out, dat$county)
temp <- coeftest(probit.out, vcov.cl)

table1[1:42,5:6] <- hl(temp[1:42,1:2])



## Wald test RIAS
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["signal"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(signal^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(signal^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[43,5] <- round(1 - pchisq(W, df = nrow(Q)),3)


## Wald test distance
V <- vcov.cl
Q <- matrix(0,3,nrow(V))
Q[1, which(probit.out$coef == probit.out$coef["distance_to_berlin"])] <- 1
Q[2, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^2)"])] <- 1
Q[3, which(probit.out$coef == probit.out$coef["I(distance_to_berlin^3)"])] <- 1

r <- rep(0,3)

theta <- probit.out$coef

W <- t(Q %*% theta - r) %*% solve(Q %*% V %*% t(Q)) %*% (Q %*% theta - r)

## compare to Chi^2 distribution to get p-value
table1[44,5] <- round(1 - pchisq(W, df = nrow(Q)),3)


latex(table1, file = "")
















.




